package com.shengzai.rdd

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo18SparkSubmit {
  def main(args: Array[String]): Unit = {

    val conf = new SparkConf()
    //集群运行不需要设置master
    //conf.setMaster("local")

    /**
     * 过滤出年龄为22岁的学生,并保存到HDFS中
     */
    conf.setAppName("SparkSubmit")

    val sc = new SparkContext(conf)

    val stuRDD: RDD[String] = sc.textFile("/data/hive/student1000.txt")

    stuRDD.map(
      line =>{
        val split: Array[String] = line.split(",")
        (split(0),line)
      }
    ).filter(
      kv =>
        kv._2.split(",")(2).toInt ==22
    ).map{
      case (key:String,value:String) =>
        s"$value"
    }.saveAsTextFile("/data/spark/filterAge")

  }

}
